What version of R is being used?
print(version)
## _
## platform x86_64-apple-darwin15.6.0
## arch x86_64
## os darwin15.6.0
## system x86_64, darwin15.6.0
## status
## major 3
## minor 6.0
## year 2019
## month 04
## day 26
## svn rev 76424
## language R
## version.string R version 3.6.0 (2019-04-26)
## nickname Planting of a Tree
citation(package = "base", lib.loc = NULL, auto = NULL)
##
## To cite R in publications use:
##
## R Core Team (2019). R: A language and environment for
## statistical computing. R Foundation for Statistical Computing,
## Vienna, Austria. URL https://www.R-project.org/.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {R: A Language and Environment for Statistical Computing},
## author = {{R Core Team}},
## organization = {R Foundation for Statistical Computing},
## address = {Vienna, Austria},
## year = {2019},
## url = {https://www.R-project.org/},
## }
##
## We have invested a lot of time and effort in creating R, please
## cite it when using it for data analysis. See also
## 'citation("pkgname")' for citing R packages.
Citation of packages being used:
citation("nlme")
##
## Pinheiro J, Bates D, DebRoy S, Sarkar D, R Core Team (2019).
## _nlme: Linear and Nonlinear Mixed Effects Models_. R package
## version 3.1-140, <URL: https://CRAN.R-project.org/package=nlme>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {{nlme}: Linear and Nonlinear Mixed Effects Models},
## author = {Jose Pinheiro and Douglas Bates and Saikat DebRoy and Deepayan Sarkar and {R Core Team}},
## year = {2019},
## note = {R package version 3.1-140},
## url = {https://CRAN.R-project.org/package=nlme},
## }
library(ggplot2)
library(viridis)
library(dplyr)
library(gstat) # variogram()
library(nlme)
library(pander)
library(MuMIn)
library(lme4)
library(rgdal)
library(psych)
library(car)
library(ggraph)
library(igraph)
damage <- read.csv("data/damage.csv")
site_loc <- read.csv("data/site_loc.csv")
uk_outline <- readOGR("data/GBR_adm", "GBR_adm1")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/johngodlee/google_drive/postgrad/extra_projects/weevils/analysis/data/GBR_adm", layer: "GBR_adm1"
## with 4 features
## It has 9 fields
## Integer64 fields read as strings: ID_0 ID_1
europe_outline <-readOGR("data/NUTS_RG_10M_2016_4326_LEVL_0/",
"NUTS_RG_10M_2016_4326_LEVL_0")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/johngodlee/google_drive/postgrad/extra_projects/weevils/analysis/data/NUTS_RG_10M_2016_4326_LEVL_0", layer: "NUTS_RG_10M_2016_4326_LEVL_0"
## with 37 features
## It has 5 fields
cg_loc <- data.frame(long = -3.21, lat = 55.86)
Match site_location information from site_loc with individuals in damage, using site_code and population as a key, respectively
damage_full <- left_join(site_loc, damage, by = c("site_code" = "population"))
names(damage_full) <- c("site_name", "seed_zone", "site_code",
"big_region", "dec_latitude", "dec_longitude",
"site_area_ha", "gsl", "growing_deg_days_c",
"feb_mean_temp_c", "jul_mean_temp_c", "loc",
"family", "individual", "field_code",
"x_coord", "y_coord", "xy_coord",
"block", "prev_damage", "curr_damage", "total_damage")
damage_nozero_dam <- damage_full %>%
filter(prev_damage > 0,
curr_damage > 0)
How many saplings received damage from pine weevils?
round((1 - (length(damage_full$site_name) - length(damage_nozero_dam$site_name)) / length(damage_full$seed_zone)) * 100, digits = 1)
## [1] 26.3
scotland_outline <- uk_outline[uk_outline$NAME_1 == "Scotland",]
scotland_fort <- fortify(scotland_outline, region = "NAME_1")
ggplot() +
geom_polygon(aes(x = long, y = lat, group = group),
colour = "black", fill = NA,
data = scotland_fort, alpha = 1) +
geom_point(aes(x = dec_longitude, y = dec_latitude,
colour = seed_zone, shape = big_region),
data = site_loc) +
geom_point(aes(x = long, y = lat), shape = 23, fill = "black",
data = cg_loc) +
# geom_text(aes(x = dec_longitude, y = dec_latitude,
# label = name),
# size = 1.6,
# data = site_loc) +
theme_classic() +
theme(legend.position = "right") +
coord_map() +
xlim(-7.5, 0) +
ylim(54.5, 59.5) +
xlab("Longitude") +
ylab("Latitude")
Map of FOV in Western Europe
europe_fort <- fortify(europe_outline, region = "NUTS_ID")
ggplot() +
geom_polygon(aes(x = long, y = lat, group = group),
colour = "black", fill = "#CCCCCC",
data = europe_fort, alpha = 1) +
geom_polygon(aes(x = long, y = lat, group = group),
colour = NA, fill = "#6B6B6B",
data = scotland_fort, alpha = 1) +
geom_rect(aes(xmin = -8, xmax = 0, ymin = 55, ymax = 60),
colour = "red", fill = NA) +
theme_void() +
theme(legend.position = "right") +
coord_map() +
xlim(-12, 25) +
ylim(35, 70) +
xlab("Longitude") +
ylab("Latitude")
ggplot(site_loc, aes(x = feb_mean_temp_c, y = jul_mean_temp_c)) +
geom_point(aes(colour = seed_zone, shape = big_region)) +
labs(x = "February mean temperature (C)",
y = "July mean temperature (C)") +
theme_classic()
ggplot(site_loc, aes(x = dec_latitude, y = growing_deg_days_c)) +
geom_point(aes(colour = seed_zone, shape = big_region)) +
stat_smooth(method = "lm") +
labs(x = "Decimal latitude",
y = "Growing degree days (c)") +
theme_classic()
Latitude and growing days don’t correlate strongly, regardless of whether big_region or seed_zone are incorporated as groups.
ggplot(site_loc, aes(x = feb_mean_temp_c, y = growing_deg_days_c)) +
geom_point(aes(colour = seed_zone, shape = big_region)) +
labs(x = "February mean temperature (C)",
y = "Growing degree days (c)") +
theme_classic()
ggplot(site_loc, aes(x = jul_mean_temp_c, y = growing_deg_days_c)) +
geom_point(aes(colour = seed_zone, shape = big_region)) +
labs(x = "July mean temperature (C)",
y = "Growing degree days (c)") +
theme_classic()
Both February and July mean temperatures correlate strongly with growing degree days, which makes sense.
site_loc %>%
group_by(seed_zone) %>%
summarise(mean_growing_deg_days_c = mean(growing_deg_days_c),
sd_growing_deg_days_c = sd(growing_deg_days_c)) %>%
mutate(seed_zone = factor(seed_zone,
levels = seed_zone[order(.$mean_growing_deg_days_c)])) %>%
ggplot(aes(x = seed_zone, y = mean_growing_deg_days_c)) +
geom_bar(stat = "identity", aes(fill = seed_zone)) +
geom_errorbar(aes(x = seed_zone,
ymin = (mean_growing_deg_days_c - sd_growing_deg_days_c),
ymax = (mean_growing_deg_days_c + sd_growing_deg_days_c)),
width = 0.5) +
labs(x = "Seed zone",
y = "Mean growing degree days (C)") +
theme_classic() +
theme(legend.position = "none")
There is a good distribution of growing degree days across the seed_zone sites, ranging from min(site_loc$growing_deg_days_c) to max(site_loc$growing_deg_days_c).
damage_full$country <- "scotland"
heirarchy <- damage_full %>%
select(country, big_region, seed_zone, site_code) %>%
distinct()
# transform it to an edge list
edges_level1_2 = heirarchy %>%
select(country, big_region) %>%
unique %>%
rename(from=country, to=big_region)
edges_level2_3 = heirarchy %>%
select(big_region, seed_zone) %>%
unique %>%
rename(from=big_region, to=seed_zone)
edges_level3_4 = heirarchy %>%
select(seed_zone, site_code) %>%
unique %>%
rename(from=seed_zone, to=site_code)
edge_list=rbind(edges_level1_2, edges_level2_3, edges_level3_4)
# Create graph object
dam_graph <- graph_from_data_frame( edge_list )
# Add line weights by damage
region_weights <- damage_full %>%
group_by(big_region) %>%
summarise(damage = sum(curr_damage))
seed_zone_weights <- damage_full %>%
group_by(seed_zone) %>%
summarise(damage = sum(curr_damage)) %>%
mutate(seed_zone = factor(as.character(seed_zone), levels = c("N", "NW", "NC", "EC", "NE", "SW", "SC")))
seed_zone_weights <- seed_zone_weights[order(seed_zone_weights$seed_zone), ]
site_code_weights <- damage_full %>%
group_by(site_code) %>%
summarise(damage = sum(curr_damage)) %>%
mutate(site_code = factor(as.character(site_code), levels = c( "GE", "RD", "SO",
"BE", "LC", "SD", "AM", "GA", "GC", "AB", "GD", "RM", "AC",
"BB", "GT", "CG", "CR", "GL", "BW", "CC", "MG")))
site_code_weights <- site_code_weights[order(site_code_weights$site_code), ]
E(dam_graph)$weight <- c(region_weights$damage, seed_zone_weights$damage, site_code_weights$damage)
ggraph(dam_graph, layout = 'dendrogram', circular = FALSE) +
geom_edge_diagonal(aes(width = weight)) +
geom_node_point() +
geom_node_label(position = "identity", aes(label = name)) +
theme_void() +
annotate("rect", xmin = 0, xmax = 22, ymin = 1.6, ymax = 2.4, fill = "red", alpha = 0.1) +
annotate("rect", xmin = 0, xmax = 22, ymin = 0.6, ymax = 1.4, fill = "blue", alpha = 0.1) +
annotate("rect", xmin = 0, xmax = 22, ymin = -0.2, ymax = 0.4, fill = "green", alpha = 0.1) +
geom_text(aes(x = 24, y = 2), label = "Region", hjust = "right") +
geom_text(aes(x = 24, y = 1), label = "Seed\nzone", hjust = "right") +
geom_text(aes(x = 24, y = 0), label = "Site", hjust = "right")
Lesions from this year (2015), log+1 transformed, according to regions of seed collection:
ggplot(damage_full, aes(x = site_code, y = log1p(curr_damage))) +
geom_violin() +
labs(x = "Population",
y = "Number of lesions in 2015") +
theme_classic()
Zero inflation makes any patterns hard to see.
Previous lesions only, log+1 transformed:
ggplot(damage_full, aes(x = site_code, y = log1p(prev_damage))) +
geom_boxplot() +
labs(x = "Population",
y = "Number of old lesions") +
theme_classic()
All lesions, log+1 transformed:
ggplot(damage_full, aes(x = site_code, y = log1p(total_damage))) +
geom_boxplot() +
labs(x = "Population",
y = "Total number of lesions") +
theme_classic()
Removing the zeroes, looking at variation within groups for those trees that were damaged.
ggplot(damage_nozero_dam, aes(x = site_code, y = log1p(total_damage))) +
geom_boxplot() +
labs(x = "Population",
y = "Total number of lesions") +
theme_classic()
Looking at number of lesions across different groupings:
ggplot(damage_nozero_dam, aes(x = big_region, y = log1p(total_damage))) +
geom_boxplot() +
labs(x = "Region",
y = "Total number of lesions") +
theme_classic()
ggplot(damage_nozero_dam, aes(x = seed_zone, y = log1p(total_damage))) +
geom_boxplot() +
labs(x = "Region",
y = "Total number of lesions") +
theme_classic()
ggplot(damage_full, aes(x = log1p(prev_damage), y = log1p(curr_damage))) +
geom_point(aes(colour = seed_zone, shape = big_region)) +
stat_smooth(method = "lm") +
theme_classic()
Not really, not even if we remove all zeroes:
ggplot(filter(damage_full, prev_damage > 0, curr_damage > 0),
aes(x = log1p(prev_damage), y = log1p(curr_damage))) +
geom_point(aes(colour = seed_zone, shape = big_region)) +
stat_smooth(method = "lm") +
theme_classic()
Testing with spearman’s correlations:
cor_dam <- cor.test(damage_full$prev_damage, damage_full$curr_damage, method = "spearman")
cor_dam_nozero <- cor.test(damage_nozero_dam$prev_damage, damage_nozero_dam$curr_damage, method = "spearman")
cor_dam
##
## Spearman's rank correlation rho
##
## data: damage_full$prev_damage and damage_full$curr_damage
## S = 45564826, p-value = 0.01015
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.09910514
cor_dam_nozero
##
## Spearman's rank correlation rho
##
## data: damage_nozero_dam$prev_damage and damage_nozero_dam$curr_damage
## S = 808708, p-value = 0.09753
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1249418
There isn’t much of a correlation between current damage and previous damage, even when zeroes are removed. Though the relationship is significant for the correlation including zero, presumably because if a tree wasn’t damaged last year, it wouldn’t be damaged again this year.
Trying with a binomial correlation of damaged or not damaged:
damage_full$prev_damage_binom <- ifelse(damage_full$prev_damage > 0, 1, 0)
damage_full$curr_damage_binom <- ifelse(damage_full$curr_damage > 0, 1, 0)
tetrachoric(table(damage_full$prev_damage_binom,damage_full$curr_damage_binom))
## Call: tetrachoric(x = table(damage_full$prev_damage_binom, damage_full$curr_damage_binom))
## tetrachoric correlation
## [1] 0.2
##
## with tau of
## 0 0
## -0.48 0.39
Still no real strong positive correlation.
Are the populations randomly placed on the grid?
ggplot(damage_full, aes(x = x_coord, y = y_coord)) +
geom_point(aes(colour = site_name)) +
coord_equal() +
theme_classic() +
theme(legend.position = "bottom")
It would seem so.
Spatial autocorrelation in total number of lesions:
ggplot(damage_full, aes(x = x_coord, y = y_coord)) +
geom_point(aes(colour = total_damage, size = total_damage)) +
scale_colour_viridis(name = "No. lesions") +
scale_size_continuous(guide = FALSE) +
coord_equal() +
theme_classic() +
theme(legend.position = "bottom")
Spatial autocorrelation in fresh lesions (this year):
ggplot(damage_full, aes(x = x_coord, y = y_coord)) +
geom_point(aes(colour = total_damage, size = curr_damage)) +
scale_colour_viridis(name = "No. lesions (2015)") +
scale_size_continuous(guide = FALSE) +
coord_equal() +
theme_classic() +
theme(legend.position = "bottom")
The weevil damage seems to be clustered in certain areas, possibly the saplings are interacting with each other, with some attracting the weevils and others becoming infected consequentially. The spatial autocorrelation seems to be less pronounced in this year’s lesions than previous years.
curr_dam_semivar <- variogram(log1p(curr_damage)~1, data = damage_full, locations = ~x_coord+y_coord)
curr_dam_semivar_fit = fit.variogram(curr_dam_semivar,
model = vgm(psill = 1, model = "Exp", range = 0.2))
plot(curr_dam_semivar, curr_dam_semivar_fit)
There doesn’t seem to be any spatial auto-correlation, maybe a sill at ~20, but that’s difficult to say.
Removing zero points:
damage_nozero_dam <- damage_full %>%
filter(curr_damage > 0)
curr_dam_no_zero_semivar <- variogram(log1p(curr_damage)~1, data = damage_nozero_dam, locations = ~x_coord+y_coord)
curr_dam_no_zero_semivar_fit = fit.variogram(curr_dam_no_zero_semivar,
model = vgm(psill = 1, model = "Exp", range = 0.2))
plot(curr_dam_no_zero_semivar, curr_dam_no_zero_semivar_fit)
No autocorrelation seen when zeroes are removed, just noise.
Basic Generalised Least Squares model and outputs:
tot_pop_gls <- gls(log1p(curr_damage) ~ site_name , data = damage_full)
vario2 <- variogram(tot_pop_gls$residuals~1, data = damage_full,
locations = ~x_coord + y_coord)
plot(curr_dam_semivar)
summary(tot_pop_gls)
## Generalized least squares fit by REML
## Model: log1p(curr_damage) ~ site_name
## Data: damage_full
## AIC BIC logLik
## 1797.356 1895.883 -876.6779
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 0.5878638 0.1555105 3.780219 0.0002
## site_nameAllt Cul -0.0591716 0.2199251 -0.269053 0.7880
## site_nameAmat -0.0513180 0.2199251 -0.233343 0.8156
## site_nameBallochuie 0.0140024 0.2199251 0.063669 0.9493
## site_nameBeinn Eighe -0.1628533 0.2199251 -0.740495 0.4593
## site_nameBlack Wood of Rannoch 0.0532630 0.2199251 0.242187 0.8087
## site_nameCoille Coire Chuilc 0.2521173 0.2199251 1.146378 0.2521
## site_nameCona Glen -0.2640063 0.2199251 -1.200437 0.2304
## site_nameCrannach -0.2269555 0.2199251 -1.031968 0.3025
## site_nameGlen Affrich 0.0127599 0.2199251 0.058020 0.9538
## site_nameGlen Cannich -0.0703732 0.2199251 -0.319987 0.7491
## site_nameGlen Derry -0.2105783 0.2199251 -0.957500 0.3387
## site_nameGlen Einig -0.1228241 0.2199251 -0.558482 0.5767
## site_nameGlen Loy -0.0027294 0.2199251 -0.012411 0.9901
## site_nameGlen Tanar 0.0153899 0.2199251 0.069978 0.9442
## site_nameLoch Clair 0.1056162 0.2199251 0.480237 0.6312
## site_nameMeggernie -0.1111995 0.2199251 -0.505624 0.6133
## site_nameRhidorroch -0.1900336 0.2199251 -0.864084 0.3879
## site_nameRothiemurchus -0.0561285 0.2199251 -0.255216 0.7986
## site_nameShieldaig -0.1068608 0.2199251 -0.485897 0.6272
## site_nameStrath Oykel -0.1641290 0.2199251 -0.746295 0.4558
##
## Correlation:
## (Intr) st_nAC st_nmA st_nmB st_nBE s_BWoR
## site_nameAllt Cul -0.707
## site_nameAmat -0.707 0.500
## site_nameBallochuie -0.707 0.500 0.500
## site_nameBeinn Eighe -0.707 0.500 0.500 0.500
## site_nameBlack Wood of Rannoch -0.707 0.500 0.500 0.500 0.500
## site_nameCoille Coire Chuilc -0.707 0.500 0.500 0.500 0.500 0.500
## site_nameCona Glen -0.707 0.500 0.500 0.500 0.500 0.500
## site_nameCrannach -0.707 0.500 0.500 0.500 0.500 0.500
## site_nameGlen Affrich -0.707 0.500 0.500 0.500 0.500 0.500
## site_nameGlen Cannich -0.707 0.500 0.500 0.500 0.500 0.500
## site_nameGlen Derry -0.707 0.500 0.500 0.500 0.500 0.500
## site_nameGlen Einig -0.707 0.500 0.500 0.500 0.500 0.500
## site_nameGlen Loy -0.707 0.500 0.500 0.500 0.500 0.500
## site_nameGlen Tanar -0.707 0.500 0.500 0.500 0.500 0.500
## site_nameLoch Clair -0.707 0.500 0.500 0.500 0.500 0.500
## site_nameMeggernie -0.707 0.500 0.500 0.500 0.500 0.500
## site_nameRhidorroch -0.707 0.500 0.500 0.500 0.500 0.500
## site_nameRothiemurchus -0.707 0.500 0.500 0.500 0.500 0.500
## site_nameShieldaig -0.707 0.500 0.500 0.500 0.500 0.500
## site_nameStrath Oykel -0.707 0.500 0.500 0.500 0.500 0.500
## st_CCC st_nCG st_nmC st_nGA st_nGC st_nGD
## site_nameAllt Cul
## site_nameAmat
## site_nameBallochuie
## site_nameBeinn Eighe
## site_nameBlack Wood of Rannoch
## site_nameCoille Coire Chuilc
## site_nameCona Glen 0.500
## site_nameCrannach 0.500 0.500
## site_nameGlen Affrich 0.500 0.500 0.500
## site_nameGlen Cannich 0.500 0.500 0.500 0.500
## site_nameGlen Derry 0.500 0.500 0.500 0.500 0.500
## site_nameGlen Einig 0.500 0.500 0.500 0.500 0.500 0.500
## site_nameGlen Loy 0.500 0.500 0.500 0.500 0.500 0.500
## site_nameGlen Tanar 0.500 0.500 0.500 0.500 0.500 0.500
## site_nameLoch Clair 0.500 0.500 0.500 0.500 0.500 0.500
## site_nameMeggernie 0.500 0.500 0.500 0.500 0.500 0.500
## site_nameRhidorroch 0.500 0.500 0.500 0.500 0.500 0.500
## site_nameRothiemurchus 0.500 0.500 0.500 0.500 0.500 0.500
## site_nameShieldaig 0.500 0.500 0.500 0.500 0.500 0.500
## site_nameStrath Oykel 0.500 0.500 0.500 0.500 0.500 0.500
## st_nGE st_nGL st_nGT st_nLC st_nmM st_nmRh
## site_nameAllt Cul
## site_nameAmat
## site_nameBallochuie
## site_nameBeinn Eighe
## site_nameBlack Wood of Rannoch
## site_nameCoille Coire Chuilc
## site_nameCona Glen
## site_nameCrannach
## site_nameGlen Affrich
## site_nameGlen Cannich
## site_nameGlen Derry
## site_nameGlen Einig
## site_nameGlen Loy 0.500
## site_nameGlen Tanar 0.500 0.500
## site_nameLoch Clair 0.500 0.500 0.500
## site_nameMeggernie 0.500 0.500 0.500 0.500
## site_nameRhidorroch 0.500 0.500 0.500 0.500 0.500
## site_nameRothiemurchus 0.500 0.500 0.500 0.500 0.500 0.500
## site_nameShieldaig 0.500 0.500 0.500 0.500 0.500 0.500
## site_nameStrath Oykel 0.500 0.500 0.500 0.500 0.500 0.500
## st_nmRt st_nmS
## site_nameAllt Cul
## site_nameAmat
## site_nameBallochuie
## site_nameBeinn Eighe
## site_nameBlack Wood of Rannoch
## site_nameCoille Coire Chuilc
## site_nameCona Glen
## site_nameCrannach
## site_nameGlen Affrich
## site_nameGlen Cannich
## site_nameGlen Derry
## site_nameGlen Einig
## site_nameGlen Loy
## site_nameGlen Tanar
## site_nameLoch Clair
## site_nameMeggernie
## site_nameRhidorroch
## site_nameRothiemurchus
## site_nameShieldaig 0.500
## site_nameStrath Oykel 0.500 0.500
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -0.9548492 -0.6099189 -0.4816809 0.3590561 4.6227457
##
## Residual standard error: 0.8797002
## Degrees of freedom: 672 total; 651 residual
Construct lots of models of current damage vs. population (site_code) with different spatial correlation structures, then compare with AIC:
m1 <- gls(log1p(curr_damage) ~ site_name, correlation = corExp(form = ~x_coord +
y_coord, nugget = TRUE), data = damage_full)
m2 <- gls(log1p(curr_damage) ~ site_name, correlation = corGaus(form = ~x_coord +
y_coord, nugget = TRUE), data = damage_full)
m2_no_nugget <- gls(log1p(curr_damage) ~ site_name, correlation = corGaus(form = ~x_coord +
y_coord, nugget = FALSE), data = damage_full)
m3 <- gls(log1p(curr_damage) ~ site_name, correlation = corSpher(form = ~x_coord +
y_coord, nugget = TRUE), data = damage_full)
m4 <- gls(log1p(curr_damage) ~ site_name, correlation = corLin(form = ~x_coord +
y_coord, nugget = TRUE), data = damage_full)
m5 <- gls(log1p(curr_damage) ~ site_name, correlation = corRatio(form = ~x_coord +
y_coord, nugget = TRUE), data = damage_full)
m_no_cor <- gls(log1p(curr_damage) ~ site_name, data = damage_full)
m_null <- gls(log1p(curr_damage) ~ 1, data = damage_full)
aic_table <- data.frame(AIC(m1, m2, m2_no_nugget, m3, m4, m5, tot_pop_gls, m_null, m_no_cor)) %>%
mutate(model_name = rownames(.)) %>%
dplyr::select(-df) %>%
arrange(AIC) %>%
dplyr::select(model_name, AIC)
pander(aic_table)
| model_name | AIC |
|---|---|
| m2 | 1710 |
| m5 | 1711 |
| m1 | 1714 |
| m_null | 1735 |
| m2_no_nugget | 1772 |
| m3 | 1778 |
| m4 | 1778 |
| tot_pop_gls | 1797 |
| m_no_cor | 1797 |
m2 (Gaussian spatial autocorrelation) is the best model according to AIC. There is definitely an effect of spatial autocorrelation as the AIC of m_no_cor is the worst fitting model of all and is higher than a null model.
Exploring the best fitted model (m2). First look at a semivariogram of the normalised residuals and the fit of the correlation structure on the raw values:
vario2_resid <- Variogram(m2, form = ~x_coord + y_coord, resType = "normalized")
vario2_fit <- Variogram(m2, form = ~x_coord + y_coord, resType = "pearson")
plot(vario2_resid, smooth = TRUE, ylim = c(0, 1.2))
plot(vario2_fit, ylim = c(0, 1.2))
The fit of the correlation structure isn’t great (blue line), I still don’t understand the semivariogram which decreases at higher distances, but that might not be a massive issue.
Next, look at the standardized residuals of the model:
plot(m2, which = 1:4)
Standardized residuals don’t show any pattern with the fitted values, so that model assumption isn’t violated.
And next the normality of residuals:
qqnorm(m2)
This is dodgy because the data comes from a zero inflated count distribution, even if it has been log transformed since.
Finally, look at the model outputs:
summary(m2)
## Generalized least squares fit by REML
## Model: log1p(curr_damage) ~ site_name
## Data: damage_full
## AIC BIC logLik
## 1709.573 1817.057 -830.7865
##
## Correlation Structure: Gaussian spatial correlation
## Formula: ~x_coord + y_coord
## Parameter estimate(s):
## range nugget
## 13.0123497 0.8017255
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 0.5383225 0.2333109 2.3073181 0.0213
## site_nameAllt Cul -0.0517610 0.2019440 -0.2563136 0.7978
## site_nameAmat -0.0342995 0.2030845 -0.1688926 0.8659
## site_nameBallochuie 0.0081113 0.2021927 0.0401165 0.9680
## site_nameBeinn Eighe -0.1627773 0.2023416 -0.8044675 0.4214
## site_nameBlack Wood of Rannoch -0.0263074 0.2023837 -0.1299878 0.8966
## site_nameCoille Coire Chuilc 0.2433496 0.2018345 1.2056888 0.2284
## site_nameCona Glen -0.2753499 0.2024172 -1.3603084 0.1742
## site_nameCrannach -0.1999951 0.2019577 -0.9902822 0.3224
## site_nameGlen Affrich -0.0282355 0.2021435 -0.1396803 0.8890
## site_nameGlen Cannich -0.0995899 0.2020468 -0.4929052 0.6222
## site_nameGlen Derry -0.1935394 0.2030463 -0.9531784 0.3409
## site_nameGlen Einig -0.0948144 0.2025547 -0.4680929 0.6399
## site_nameGlen Loy -0.0561507 0.2020239 -0.2779409 0.7811
## site_nameGlen Tanar 0.0085607 0.2021055 0.0423576 0.9662
## site_nameLoch Clair 0.1196209 0.2017184 0.5930094 0.5534
## site_nameMeggernie -0.1816885 0.2028012 -0.8958944 0.3706
## site_nameRhidorroch -0.2191087 0.2020970 -1.0841762 0.2787
## site_nameRothiemurchus -0.0877616 0.2023606 -0.4336892 0.6647
## site_nameShieldaig -0.0839582 0.2024576 -0.4146951 0.6785
## site_nameStrath Oykel -0.2003973 0.2027189 -0.9885479 0.3233
##
## Correlation:
## (Intr) st_nAC st_nmA st_nmB st_nBE s_BWoR
## site_nameAllt Cul -0.430
## site_nameAmat -0.434 0.499
## site_nameBallochuie -0.434 0.502 0.502
## site_nameBeinn Eighe -0.435 0.498 0.503 0.501
## site_nameBlack Wood of Rannoch -0.424 0.500 0.494 0.501 0.499
## site_nameCoille Coire Chuilc -0.430 0.499 0.495 0.496 0.497 0.495
## site_nameCona Glen -0.439 0.500 0.504 0.502 0.503 0.498
## site_nameCrannach -0.438 0.499 0.499 0.501 0.501 0.497
## site_nameGlen Affrich -0.426 0.500 0.499 0.502 0.501 0.501
## site_nameGlen Cannich -0.433 0.501 0.498 0.502 0.502 0.501
## site_nameGlen Derry -0.432 0.501 0.503 0.502 0.503 0.499
## site_nameGlen Einig -0.440 0.497 0.498 0.500 0.502 0.497
## site_nameGlen Loy -0.427 0.501 0.497 0.501 0.500 0.503
## site_nameGlen Tanar -0.430 0.499 0.503 0.500 0.500 0.494
## site_nameLoch Clair -0.427 0.496 0.496 0.496 0.499 0.496
## site_nameMeggernie -0.431 0.500 0.497 0.503 0.495 0.501
## site_nameRhidorroch -0.429 0.500 0.499 0.502 0.499 0.501
## site_nameRothiemurchus -0.434 0.501 0.504 0.502 0.502 0.499
## site_nameShieldaig -0.438 0.499 0.496 0.500 0.502 0.499
## site_nameStrath Oykel -0.434 0.499 0.499 0.502 0.501 0.501
## st_CCC st_nCG st_nmC st_nGA st_nGC st_nGD
## site_nameAllt Cul
## site_nameAmat
## site_nameBallochuie
## site_nameBeinn Eighe
## site_nameBlack Wood of Rannoch
## site_nameCoille Coire Chuilc
## site_nameCona Glen 0.500
## site_nameCrannach 0.499 0.503
## site_nameGlen Affrich 0.495 0.499 0.499
## site_nameGlen Cannich 0.495 0.499 0.500 0.501
## site_nameGlen Derry 0.499 0.504 0.503 0.502 0.500
## site_nameGlen Einig 0.498 0.504 0.503 0.495 0.498 0.501
## site_nameGlen Loy 0.499 0.500 0.499 0.501 0.502 0.502
## site_nameGlen Tanar 0.499 0.502 0.499 0.499 0.497 0.501
## site_nameLoch Clair 0.500 0.499 0.500 0.498 0.495 0.499
## site_nameMeggernie 0.493 0.497 0.496 0.502 0.500 0.498
## site_nameRhidorroch 0.498 0.501 0.500 0.501 0.500 0.501
## site_nameRothiemurchus 0.497 0.501 0.500 0.503 0.501 0.505
## site_nameShieldaig 0.498 0.501 0.502 0.499 0.500 0.500
## site_nameStrath Oykel 0.499 0.503 0.501 0.500 0.499 0.504
## st_nGE st_nGL st_nGT st_nLC st_nmM st_nmRh
## site_nameAllt Cul
## site_nameAmat
## site_nameBallochuie
## site_nameBeinn Eighe
## site_nameBlack Wood of Rannoch
## site_nameCoille Coire Chuilc
## site_nameCona Glen
## site_nameCrannach
## site_nameGlen Affrich
## site_nameGlen Cannich
## site_nameGlen Derry
## site_nameGlen Einig
## site_nameGlen Loy 0.498
## site_nameGlen Tanar 0.497 0.497
## site_nameLoch Clair 0.499 0.498 0.499
## site_nameMeggernie 0.493 0.501 0.495 0.493
## site_nameRhidorroch 0.499 0.501 0.501 0.498 0.499
## site_nameRothiemurchus 0.497 0.500 0.503 0.498 0.500 0.501
## site_nameShieldaig 0.502 0.500 0.498 0.500 0.494 0.502
## site_nameStrath Oykel 0.502 0.502 0.500 0.498 0.498 0.503
## st_nmRt st_nmS
## site_nameAllt Cul
## site_nameAmat
## site_nameBallochuie
## site_nameBeinn Eighe
## site_nameBlack Wood of Rannoch
## site_nameCoille Coire Chuilc
## site_nameCona Glen
## site_nameCrannach
## site_nameGlen Affrich
## site_nameGlen Cannich
## site_nameGlen Derry
## site_nameGlen Einig
## site_nameGlen Loy
## site_nameGlen Tanar
## site_nameLoch Clair
## site_nameMeggernie
## site_nameRhidorroch
## site_nameRothiemurchus
## site_nameShieldaig 0.498
## site_nameStrath Oykel 0.502 0.503
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -0.8703802 -0.5417790 -0.3839108 0.3896953 4.6617845
##
## Residual standard error: 0.8980812
## Degrees of freedom: 672 total; 651 residual
anova(m2)
## Denom. DF: 651
## numDF F-value p-value
## (Intercept) 1 5.889275 0.0155
## site_name 20 0.709936 0.8182
r.squaredLR(m2, null = m_null)
## [1] 0.1438731
## attr(,"adj.r.squared")
## [1] 0.1558144
r.squaredLR(m2)
## [1] 0.1438731
## attr(,"adj.r.squared")
## [1] 0.1558144
No significant effect of population on weevil damage once controlled for spatial structure (p = 0.156) and the best model accounts for only 15.6% of the variation in weevil damage, regardless of whether the null model was specified.
m2_fam_nest <- gls(log1p(curr_damage) ~ family:site_name, correlation = corGaus(form = ~x_coord +
y_coord, nugget = TRUE), data = damage_full)
## Error in glsEstimate(object, control = control): computed "gls" fit is singular, rank 169
The model won’t converge as the model has a singular fit. I think this is caused by some nested groups having near zero variance, I can remove these groups. It’s probably because they have no lesions across an entire group.
m2_fam <- gls(log1p(curr_damage) ~ family,
correlation = corGaus(form = ~x_coord + y_coord, nugget = TRUE),
data = damage_full)
m1_fam <- gls(log1p(curr_damage) ~ family,
correlation = corExp(form = ~x_coord + y_coord, nugget = TRUE),
data = damage_full)
m3_fam <- gls(log1p(curr_damage) ~ family,
correlation = corSpher(form = ~x_coord + y_coord, nugget = TRUE),
data = damage_full)
m4_fam <- gls(log1p(curr_damage) ~ family,
correlation = corLin(form = ~x_coord + y_coord, nugget = TRUE),
data = damage_full)
m5_fam <- gls(log1p(curr_damage) ~ family,
correlation = corRatio(form = ~x_coord + y_coord, nugget = TRUE),
data = damage_full)
aic_table <- data.frame(AIC(m1, m2, m2_fam, m2_no_nugget, m3, m4, m5, tot_pop_gls, m_null, m1_fam, m3_fam, m4_fam, m5_fam, m_no_cor)) %>%
mutate(model_name = rownames(.)) %>%
dplyr::select(-df) %>%
arrange(AIC) %>%
dplyr::select(model_name, AIC)
pander(aic_table)
| model_name | AIC |
|---|---|
| m2 | 1710 |
| m5 | 1711 |
| m1 | 1714 |
| m_null | 1735 |
| m2_no_nugget | 1772 |
| m3 | 1778 |
| m4 | 1778 |
| tot_pop_gls | 1797 |
| m_no_cor | 1797 |
| m2_fam | 1819 |
| m5_fam | 1820 |
| m1_fam | 1824 |
| m4_fam | 1881 |
| m3_fam | 1881 |
Currently, the GLS with family nested within population doesn’t converge due to singularities, I think this is because some of the smaller groups don’t have any lesions, so the variance of the group is zero. If I just include family level effects and compare to the previous population level models, I still don’t get any better than the population level Gaussian correlation structure model (m2).
Anova(m2, type = "II")
## Analysis of Deviance Table (Type II tests)
##
## Response: log1p(curr_damage)
## Df Chisq Pr(>Chisq)
## site_name 20 14.199 0.8203
Anova(m2_fam, type = "II")
## Analysis of Deviance Table (Type II tests)
##
## Response: log1p(curr_damage)
## Df Chisq Pr(>Chisq)
## family 167 147.35 0.8608
curr_fam_lmer <- lmer(log1p(curr_damage) ~ family + (1|site_name), data = damage_full)
curr_pop_lmer <- lmer(log1p(curr_damage) ~ site_name + (1|block), data = damage_full)
curr_lmer_rand <- lmer(log1p(curr_damage) ~ (1|site_name), data = damage_full)
## boundary (singular) fit: see ?isSingular
curr_lmer_null <- lm(log1p(curr_damage) ~ 1, data = damage_full)
summary(curr_fam_lmer)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log1p(curr_damage) ~ family + (1 | site_name)
## Data: damage_full
##
## REML criterion at convergence: 1552.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8830 -0.5800 -0.1933 0.3867 3.8016
##
## Random effects:
## Groups Name Variance Std.Dev.
## site_name (Intercept) 0.01532 0.1238
## Residual 0.80340 0.8963
## Number of obs: 672, groups: site_name, 21
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.199e-01 4.649e-01 1.118
## familyAB10 3.132e-01 6.338e-01 0.494
## familyAB2 4.024e-01 6.338e-01 0.635
## familyAB3 3.543e-01 6.338e-01 0.559
## familyAB4 2.849e-01 6.338e-01 0.449
## familyAB5 -5.199e-01 6.338e-01 -0.820
## familyAB6 5.579e-02 6.338e-01 0.088
## familyAB9 -3.466e-01 6.338e-01 -0.547
## familyAC1 -2.452e-01 6.575e-01 -0.373
## familyAC2 -3.466e-01 6.575e-01 -0.527
## familyAC3 3.303e-14 6.575e-01 0.000
## familyAC5 1.014e-01 6.575e-01 0.154
## familyAC6 -1.733e-01 6.575e-01 -0.264
## familyAC7 5.878e-01 6.575e-01 0.894
## familyAC8 -1.175e-01 6.575e-01 -0.179
## familyAC9 2.640e-01 6.575e-01 0.402
## familyAM1 1.572e-01 6.575e-01 0.239
## familyAM10 5.199e-01 6.575e-01 0.791
## familyAM3 -5.199e-01 6.575e-01 -0.791
## familyAM4 1.088e+00 6.575e-01 1.655
## familyAM5 -5.199e-01 6.575e-01 -0.791
## familyAM6 1.014e-01 6.575e-01 0.154
## familyAM8 -1.733e-01 6.575e-01 -0.264
## familyAM9 -5.199e-01 6.575e-01 -0.791
## familyBB1 -2.452e-01 6.575e-01 -0.373
## familyBB2 4.479e-01 6.575e-01 0.681
## familyBB3 2.945e-02 6.575e-01 0.045
## familyBB4 2.291e-01 6.575e-01 0.348
## familyBB5 -3.466e-01 6.575e-01 -0.527
## familyBB7 9.599e-01 6.575e-01 1.460
## familyBB8 -3.466e-01 6.575e-01 -0.527
## familyBB9 -7.192e-02 6.575e-01 -0.109
## familyBE1 7.489e-01 6.575e-01 1.139
## familyBE2 1.572e-01 6.575e-01 0.239
## familyBE3 -1.733e-01 6.575e-01 -0.264
## familyBE4 -5.199e-01 6.575e-01 -0.791
## familyBE5 -1.733e-01 6.575e-01 -0.264
## familyBE7 -5.199e-01 6.575e-01 -0.791
## familyBE8 -2.452e-01 6.575e-01 -0.373
## familyBE9 -3.338e-02 6.575e-01 -0.051
## familyBW10 -3.466e-01 6.575e-01 -0.527
## familyBW2 5.579e-02 6.575e-01 0.085
## familyBW3 -3.338e-02 6.575e-01 -0.051
## familyBW4 9.344e-01 6.575e-01 1.421
## familyBW6 7.065e-01 6.575e-01 1.074
## familyBW7 3.466e-01 6.575e-01 0.527
## familyBW8 -3.466e-01 6.575e-01 -0.527
## familyBW9 -3.466e-01 6.575e-01 -0.527
## familyCC1 -3.466e-01 6.575e-01 -0.527
## familyCC10 1.168e+00 6.575e-01 1.776
## familyCC3 5.037e-01 6.575e-01 0.766
## familyCC4 2.747e-01 6.575e-01 0.418
## familyCC5 -1.733e-01 6.575e-01 -0.264
## familyCC6 -3.338e-02 6.575e-01 -0.051
## familyCC7 8.375e-01 6.575e-01 1.274
## familyCC9 3.304e-01 6.575e-01 0.503
## familyCG1 -5.199e-01 6.575e-01 -0.791
## familyCG2 -3.466e-01 6.575e-01 -0.527
## familyCG3 7.008e-01 6.575e-01 1.066
## familyCG4 -7.192e-02 6.575e-01 -0.109
## familyCG5 -5.199e-01 6.575e-01 -0.791
## familyCG6 -3.466e-01 6.575e-01 -0.527
## familyCG7 -3.466e-01 6.575e-01 -0.527
## familyCG8 -1.175e-01 6.575e-01 -0.179
## familyCR10 -1.733e-01 6.575e-01 -0.264
## familyCR2 1.014e-01 6.575e-01 0.154
## familyCR3 -7.192e-02 6.575e-01 -0.109
## familyCR4 3.304e-01 6.575e-01 0.503
## familyCR6 -5.199e-01 6.575e-01 -0.791
## familyCR7 -3.466e-01 6.575e-01 -0.527
## familyCR8 -3.466e-01 6.575e-01 -0.527
## familyCR9 -2.452e-01 6.575e-01 -0.373
## familyGA1 -2.452e-01 6.575e-01 -0.373
## familyGA10 -1.733e-01 6.575e-01 -0.264
## familyGA3 4.680e-01 6.575e-01 0.712
## familyGA5 -3.466e-01 6.575e-01 -0.527
## familyGA6 2.529e-01 6.575e-01 0.385
## familyGA7 2.027e-01 6.575e-01 0.308
## familyGA8 3.304e-01 6.575e-01 0.503
## familyGA9 1.572e-01 6.575e-01 0.239
## familyGC1 2.945e-02 6.575e-01 0.045
## familyGC10 -3.466e-01 6.575e-01 -0.527
## familyGC2 -2.452e-01 6.575e-01 -0.373
## familyGC3 3.466e-01 6.575e-01 0.527
## familyGC5 -1.175e-01 6.575e-01 -0.179
## familyGC6 3.304e-01 6.575e-01 0.503
## familyGC8 1.572e-01 6.575e-01 0.239
## familyGC9 -1.733e-01 6.575e-01 -0.264
## familyGD1 1.014e-01 6.575e-01 0.154
## familyGD10 -3.466e-01 6.575e-01 -0.527
## familyGD2 1.733e-01 6.575e-01 0.264
## familyGD4 2.162e-01 6.575e-01 0.329
## familyGD5 -5.199e-01 6.575e-01 -0.791
## familyGD6 -5.199e-01 6.575e-01 -0.791
## familyGD7 1.014e-01 6.575e-01 0.154
## familyGD9 -3.466e-01 6.575e-01 -0.527
## familyGE1 -3.466e-01 6.575e-01 -0.527
## familyGE2 1.733e-01 6.575e-01 0.264
## familyGE3 -2.452e-01 6.575e-01 -0.373
## familyGE4 -5.199e-01 6.575e-01 -0.791
## familyGE6 -7.192e-02 6.575e-01 -0.109
## familyGE7 9.517e-01 6.575e-01 1.447
## familyGE8 -3.466e-01 6.575e-01 -0.527
## familyGE9 -3.338e-02 6.575e-01 -0.051
## familyGL1 2.747e-01 6.575e-01 0.418
## familyGL10 -1.733e-01 6.575e-01 -0.264
## familyGL2 -3.466e-01 6.575e-01 -0.527
## familyGL3 5.579e-02 6.575e-01 0.085
## familyGL4 1.399e-01 6.575e-01 0.213
## familyGL6 4.318e-01 6.575e-01 0.657
## familyGL7 3.132e-01 6.575e-01 0.476
## familyGL8 -1.733e-01 6.575e-01 -0.264
## familyGT1 3.304e-01 6.575e-01 0.503
## familyGT2 -3.466e-01 6.575e-01 -0.527
## familyGT4 3.304e-01 6.575e-01 0.503
## familyGT5 4.581e-01 6.575e-01 0.697
## familyGT6 -1.733e-01 6.575e-01 -0.264
## familyGT7 -3.466e-01 6.575e-01 -0.527
## familyGT8 3.132e-01 6.575e-01 0.476
## familyGT9 1.014e-01 6.575e-01 0.154
## familyLC1 1.095e+00 6.575e-01 1.665
## familyLC10 3.304e-01 6.575e-01 0.503
## familyLC2 2.945e-02 6.575e-01 0.045
## familyLC3 -2.452e-01 6.575e-01 -0.373
## familyLC4 -1.733e-01 6.575e-01 -0.264
## familyLC5 -3.466e-01 6.575e-01 -0.527
## familyLC6 5.579e-02 6.575e-01 0.085
## familyLC7 6.436e-01 6.575e-01 0.979
## familyMG10 -3.466e-01 6.575e-01 -0.527
## familyMG2 1.733e-01 6.575e-01 0.264
## familyMG3 -5.199e-01 6.575e-01 -0.791
## familyMG5 -5.199e-01 6.575e-01 -0.791
## familyMG6 5.579e-02 6.575e-01 0.085
## familyMG7 1.399e-01 6.575e-01 0.213
## familyMG8 6.160e-01 6.575e-01 0.937
## familyMG9 5.579e-02 6.575e-01 0.085
## familyRD1 5.756e-01 6.575e-01 0.875
## familyRD10 -5.199e-01 6.575e-01 -0.791
## familyRD2 5.995e-01 6.575e-01 0.912
## familyRD3 -5.199e-01 6.575e-01 -0.791
## familyRD5 -1.733e-01 6.575e-01 -0.264
## familyRD7 1.014e-01 6.575e-01 0.154
## familyRD8 -5.199e-01 6.575e-01 -0.791
## familyRD9 -5.199e-01 6.575e-01 -0.791
## familyRM1 -5.199e-01 6.575e-01 -0.791
## familyRM10 -5.199e-01 6.575e-01 -0.791
## familyRM3 -7.192e-02 6.575e-01 -0.109
## familyRM4 3.328e-14 6.575e-01 0.000
## familyRM5 -1.733e-01 6.575e-01 -0.264
## familyRM6 5.276e-01 6.575e-01 0.802
## familyRM7 3.326e-14 6.575e-01 0.000
## familyRM9 8.524e-01 6.575e-01 1.296
## familySD1 1.572e-01 6.575e-01 0.239
## familySD10 5.579e-02 6.575e-01 0.085
## familySD2 1.733e-01 6.575e-01 0.264
## familySD4 -5.199e-01 6.575e-01 -0.791
## familySD6 -5.199e-01 6.575e-01 -0.791
## familySD7 3.760e-01 6.575e-01 0.572
## familySD8 -5.199e-01 6.575e-01 -0.791
## familySD9 4.865e-01 6.575e-01 0.740
## familySO10 -5.199e-01 6.575e-01 -0.791
## familySO2 -5.199e-01 6.575e-01 -0.791
## familySO3 -3.466e-01 6.575e-01 -0.527
## familySO4 1.733e-01 6.575e-01 0.264
## familySO5 1.399e-01 6.575e-01 0.213
## familySO6 4.774e-01 6.575e-01 0.726
## familySO7 3.466e-01 6.575e-01 0.527
## familySO9 -5.199e-01 6.575e-01 -0.791
##
## Correlation matrix not shown by default, as p = 168 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## convergence code: 0
## unable to evaluate scaled gradient
## Hessian is numerically singular: parameters are not uniquely determined
aic_table_lmer <- data.frame(AIC(curr_fam_lmer, curr_lmer_rand, curr_lmer_null, curr_pop_lmer)) %>%
mutate(model_name = rownames(.)) %>%
dplyr::select(-df) %>%
arrange(AIC) %>%
dplyr::select(model_name, AIC)
pander(aic_table_lmer)
| model_name | AIC |
|---|---|
| curr_lmer_null | 1730 |
| curr_lmer_rand | 1737 |
| curr_pop_lmer | 1745 |
| curr_fam_lmer | 1893 |
The null model is better than any family level mixed effects model, and is better than a mixed model including population. This null result is likely because I am unable to include the spatial autocorrelation structure in the linear model, and this takes up a lot of the variance.
Family level groupings didn’t appear to predict weevil damage at all and population (site) level groupings don’t predict very much either, so what if I group into larger groups such as seed_zone and big_region?
ggplot(damage_full, aes(x = seed_zone, y = log1p(curr_damage))) +
geom_boxplot(aes(fill = seed_zone)) +
theme_classic() +
theme(legend.position = "none")
The following model uses the same auto-correlation structure as the previous models, but uses seed zone rather than family as the categorical predictor.
m2_sz <- gls(curr_damage ~ seed_zone,
correlation = corGaus(form = ~x_coord + y_coord, nugget = TRUE),
data = damage_full)
summary(m2_sz)
## Generalized least squares fit by REML
## Model: curr_damage ~ seed_zone
## Data: damage_full
## AIC BIC logLik
## 4302.714 4347.712 -2141.357
##
## Correlation Structure: Gaussian spatial correlation
## Formula: ~x_coord + y_coord
## Parameter estimate(s):
## range nugget
## 12.2664550 0.8793768
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 1.6481584 1.1450844 1.4393336 0.1505
## seed_zoneN -0.2320922 0.8466448 -0.2741317 0.7841
## seed_zoneNC -0.0275360 0.8435759 -0.0326420 0.9740
## seed_zoneNE 0.0067640 0.8433457 0.0080204 0.9936
## seed_zoneNW 0.1768036 0.8460896 0.2089656 0.8345
## seed_zoneSC 1.2492957 0.8460666 1.4765926 0.1403
## seed_zoneSW -0.4335683 0.8440933 -0.5136497 0.6077
##
## Correlation:
## (Intr) sd_znN sd_zNC sd_zNE sd_zNW sd_zSC
## seed_zoneN -0.373
## seed_zoneNC -0.365 0.495
## seed_zoneNE -0.366 0.498 0.500
## seed_zoneNW -0.371 0.501 0.497 0.496
## seed_zoneSC -0.364 0.499 0.497 0.499 0.496
## seed_zoneSW -0.372 0.502 0.498 0.498 0.501 0.500
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -0.46653804 -0.26646987 -0.22801008 -0.05888329 14.50800050
##
## Residual standard error: 6.210542
## Degrees of freedom: 672 total; 665 residual
Anova(m2_sz)
## Analysis of Deviance Table (Type II tests)
##
## Response: curr_damage
## Df Chisq Pr(>Chisq)
## seed_zone 6 4.8999 0.5567
This model uses big_region as the categorical predictor:
m2_br <- gls(curr_damage ~ big_region,
correlation = corGaus(form = ~x_coord + y_coord, nugget = TRUE),
data = damage_full)
summary(m2_br)
## Generalized least squares fit by REML
## Model: curr_damage ~ big_region
## Data: damage_full
## AIC BIC logLik
## 4304.635 4331.67 -2146.318
##
## Correlation Structure: Gaussian spatial correlation
## Formula: ~x_coord + y_coord
## Parameter estimate(s):
## range nugget
## 12.2403745 0.8774382
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 1.6029194 1.0417223 1.538720 0.1243
## big_regionB 0.0272063 0.5460247 0.049826 0.9603
## big_regionC 0.4289262 0.5458577 0.785784 0.4323
##
## Correlation:
## (Intr) bg_rgB
## big_regionB -0.205
## big_regionC -0.206 0.400
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -0.32680600 -0.26219257 -0.25781667 -0.09697472 14.63149454
##
## Residual standard error: 6.217284
## Degrees of freedom: 672 total; 669 residual
Anova(m2_br, type = "II")
## Analysis of Deviance Table (Type II tests)
##
## Response: curr_damage
## Df Chisq Pr(>Chisq)
## big_region 2 0.7005 0.7045
aic_table <- data.frame(AIC(m1, m2, m2_no_nugget, m3, m4, m5, tot_pop_gls, m_null, m1_fam, m2_fam, m3_fam, m4_fam, m5_fam, m2_sz, m2_br)) %>%
mutate(model_name = rownames(.)) %>%
dplyr::select(-df) %>%
arrange(AIC) %>%
dplyr::select(model_name, AIC)
pander(aic_table)
| model_name | AIC |
|---|---|
| m2 | 1710 |
| m5 | 1711 |
| m1 | 1714 |
| m_null | 1735 |
| m2_no_nugget | 1772 |
| m3 | 1778 |
| m4 | 1778 |
| tot_pop_gls | 1797 |
| m2_fam | 1819 |
| m5_fam | 1820 |
| m1_fam | 1824 |
| m4_fam | 1881 |
| m3_fam | 1881 |
| m2_sz | 4303 |
| m2_br | 4305 |
Seed zone accounts for more variance than big_region, but both models are vastly worse at predicting weevil damage than population level models.
Bearing in mind that this will change the sample size of each group. By how much?
damage_full %>%
filter(curr_damage > 0) %>%
group_by(site_code) %>%
tally() %>%
arrange(n) %>%
pander()
| site_code | n |
|---|---|
| RD | 6 |
| AM | 8 |
| CG | 8 |
| GE | 9 |
| MG | 9 |
| SO | 9 |
| AB | 10 |
| BE | 10 |
| AC | 11 |
| CR | 11 |
| GC | 11 |
| GD | 11 |
| RM | 11 |
| SD | 11 |
| BW | 13 |
| GA | 13 |
| GL | 13 |
| BB | 14 |
| CC | 15 |
| GT | 15 |
| LC | 15 |
Minimum group size is 6, maximum group size is 15.
m2_nozero <- gls(log1p(curr_damage) ~ site_name, correlation = corGaus(form = ~x_coord +
y_coord, nugget = TRUE), data = damage_nozero_dam)
The model accounts for more variation in damage by site when the zeroes are removed.
The best model had no random effect structure and instead just included a spatial autocorrelation term to account for plot effects and excluded trees with no lesions. Of the trees that had lesions these are the model results:
AIC(m2_nozero)
## [1] 602.9439
r.squaredLR(m2_nozero)
## [1] 0.1842552
## attr(,"adj.r.squared")
## [1] 0.2007475
summary(m2_nozero)
## Generalized least squares fit by REML
## Model: log1p(curr_damage) ~ site_name
## Data: damage_nozero_dam
## AIC BIC logLik
## 602.9439 683.502 -277.4719
##
## Correlation Structure: Gaussian spatial correlation
## Formula: ~x_coord + y_coord
## Parameter estimate(s):
## range nugget
## 13.024809 0.810506
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 1.5715525 0.3066857 5.124310 0.0000
## site_nameAllt Cul -0.2621646 0.3417510 -0.767122 0.4439
## site_nameAmat 0.4446032 0.3742521 1.187978 0.2362
## site_nameBallochuie -0.2803383 0.3243420 -0.864329 0.3884
## site_nameBeinn Eighe -0.4074252 0.3495289 -1.165641 0.2451
## site_nameBlack Wood of Rannoch -0.1600983 0.3276092 -0.488687 0.6256
## site_nameCoille Coire Chuilc 0.1586384 0.3203834 0.495152 0.6210
## site_nameCona Glen -0.4571068 0.3694983 -1.237101 0.2174
## site_nameCrannach -0.7424013 0.3401989 -2.182257 0.0302
## site_nameGlen Affrich -0.3484625 0.3274738 -1.064093 0.2885
## site_nameGlen Cannich -0.3284320 0.3395748 -0.967186 0.3346
## site_nameGlen Derry -0.5534607 0.3422741 -1.617010 0.1074
## site_nameGlen Einig -0.0050535 0.3592981 -0.014065 0.9888
## site_nameGlen Loy -0.3063341 0.3297626 -0.928953 0.3540
## site_nameGlen Tanar -0.3828466 0.3188752 -1.200615 0.2312
## site_nameLoch Clair -0.2967415 0.3180859 -0.932897 0.3519
## site_nameMeggernie 0.0115606 0.3637799 0.031779 0.9747
## site_nameRhidorroch 0.2300337 0.4018262 0.572471 0.5676
## site_nameRothiemurchus -0.2480708 0.3403476 -0.728875 0.4669
## site_nameShieldaig -0.2625104 0.3432720 -0.764730 0.4453
## site_nameStrath Oykel -0.2940849 0.3598217 -0.817307 0.4147
##
## Correlation:
## (Intr) st_nAC st_nmA st_nmB st_nBE s_BWoR
## site_nameAllt Cul -0.583
## site_nameAmat -0.548 0.486
## site_nameBallochuie -0.623 0.550 0.511
## site_nameBeinn Eighe -0.576 0.500 0.472 0.544
## site_nameBlack Wood of Rannoch -0.604 0.543 0.499 0.569 0.521
## site_nameCoille Coire Chuilc -0.636 0.554 0.519 0.598 0.549 0.574
## site_nameCona Glen -0.545 0.479 0.446 0.509 0.475 0.500
## site_nameCrannach -0.584 0.518 0.482 0.553 0.515 0.535
## site_nameGlen Affrich -0.600 0.541 0.496 0.574 0.529 0.559
## site_nameGlen Cannich -0.581 0.524 0.481 0.549 0.509 0.543
## site_nameGlen Derry -0.591 0.523 0.490 0.558 0.513 0.541
## site_nameGlen Einig -0.569 0.499 0.465 0.532 0.490 0.515
## site_nameGlen Loy -0.606 0.538 0.499 0.582 0.537 0.562
## site_nameGlen Tanar -0.634 0.559 0.523 0.599 0.551 0.579
## site_nameLoch Clair -0.617 0.555 0.504 0.586 0.538 0.586
## site_nameMeggernie -0.551 0.502 0.457 0.533 0.484 0.509
## site_nameRhidorroch -0.491 0.443 0.404 0.463 0.427 0.453
## site_nameRothiemurchus -0.582 0.520 0.480 0.555 0.509 0.542
## site_nameShieldaig -0.592 0.513 0.484 0.548 0.506 0.546
## site_nameStrath Oykel -0.553 0.494 0.457 0.526 0.481 0.517
## st_CCC st_nCG st_nmC st_nGA st_nGC st_nGD
## site_nameAllt Cul
## site_nameAmat
## site_nameBallochuie
## site_nameBeinn Eighe
## site_nameBlack Wood of Rannoch
## site_nameCoille Coire Chuilc
## site_nameCona Glen 0.515
## site_nameCrannach 0.559 0.483
## site_nameGlen Affrich 0.573 0.496 0.543
## site_nameGlen Cannich 0.552 0.485 0.524 0.540
## site_nameGlen Derry 0.563 0.481 0.525 0.546 0.519
## site_nameGlen Einig 0.538 0.458 0.496 0.516 0.496 0.505
## site_nameGlen Loy 0.581 0.502 0.540 0.566 0.541 0.546
## site_nameGlen Tanar 0.606 0.521 0.563 0.578 0.559 0.564
## site_nameLoch Clair 0.591 0.513 0.551 0.577 0.556 0.553
## site_nameMeggernie 0.530 0.455 0.489 0.520 0.493 0.497
## site_nameRhidorroch 0.467 0.408 0.443 0.451 0.447 0.434
## site_nameRothiemurchus 0.558 0.481 0.521 0.543 0.518 0.522
## site_nameShieldaig 0.559 0.479 0.516 0.531 0.516 0.520
## site_nameStrath Oykel 0.531 0.459 0.491 0.511 0.495 0.491
## st_nGE st_nGL st_nGT st_nLC st_nmM st_nmRh
## site_nameAllt Cul
## site_nameAmat
## site_nameBallochuie
## site_nameBeinn Eighe
## site_nameBlack Wood of Rannoch
## site_nameCoille Coire Chuilc
## site_nameCona Glen
## site_nameCrannach
## site_nameGlen Affrich
## site_nameGlen Cannich
## site_nameGlen Derry
## site_nameGlen Einig
## site_nameGlen Loy 0.520
## site_nameGlen Tanar 0.537 0.584
## site_nameLoch Clair 0.525 0.582 0.595
## site_nameMeggernie 0.478 0.526 0.534 0.527
## site_nameRhidorroch 0.413 0.452 0.473 0.465 0.414
## site_nameRothiemurchus 0.493 0.547 0.562 0.563 0.498 0.439
## site_nameShieldaig 0.495 0.542 0.558 0.561 0.486 0.430
## site_nameStrath Oykel 0.467 0.522 0.533 0.536 0.475 0.418
## st_nmRt st_nmS
## site_nameAllt Cul
## site_nameAmat
## site_nameBallochuie
## site_nameBeinn Eighe
## site_nameBlack Wood of Rannoch
## site_nameCoille Coire Chuilc
## site_nameCona Glen
## site_nameCrannach
## site_nameGlen Affrich
## site_nameGlen Cannich
## site_nameGlen Derry
## site_nameGlen Einig
## site_nameGlen Loy
## site_nameGlen Tanar
## site_nameLoch Clair
## site_nameMeggernie
## site_nameRhidorroch
## site_nameRothiemurchus
## site_nameShieldaig 0.522
## site_nameStrath Oykel 0.504 0.499
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.29317025 -0.57814801 -0.02935295 0.79720197 3.45352255
##
## Residual standard error: 0.8571485
## Degrees of freedom: 233 total; 212 residual
Anova(m2_nozero)
## Analysis of Deviance Table (Type II tests)
##
## Response: log1p(curr_damage)
## Df Chisq Pr(>Chisq)
## site_name 20 23.509 0.2645